# !diagnostics off
################################################################################
################################################################################
##
##  Replication File for:
##  "Have State Policy Agendas Become More Nationalized?"
##
##  November 4, 2021
##  Copyright (c) Joseph L. Sutherland, Daniel Butler
##
################################################################################
################################################################################

# Load Packages
suppressPackageStartupMessages({
  library(tidyverse)
  library(readstata13)
  library(scales)
  library(stargazer)
  library(sandwich)
  library(lmtest)
})

# Set up space.
rm(list = ls())


################################################################################
# Figure 1
# Load data
war <- read_rds("./Data/WAR.RDS")

p <- war %>%
  ggplot(aes(x = year, y = pct, color = party)) +
  geom_point(alpha = 0.3) +
  geom_smooth(span = 0.20, se = F) +
  theme_classic() +
  scale_color_manual(values = c("blue", "red"), name = "Party") +
  scale_y_continuous(labels = percent, breaks = pretty_breaks(10)) +
  scale_x_continuous(breaks = pretty_breaks(20)) +
  annotate(
    "text",
    x = 1968,
    y = 0.0003,
    label = "Vietnam War",
    size = 4
  ) +
  annotate(
    "text",
    x = 1991,
    y = 0.00034,
    label = "Gulf War",
    size = 4
  ) +
  annotate(
    "text",
    x = 2003,
    y = 0.000363,
    label = "War in Afghanistan",
    size = 4
  ) +
  annotate(
    "text",
    x = 2003,
    y = 0.00035,
    label = "Iraq War",
    size = 4
  ) +
  labs(x = "\nYear",
       y = "Unigram Rate of Usage\n") +
  theme(panel.grid = element_blank(), title = element_text(face = "bold"))
p
ggsave(p,
       filename = "./warplot.png",
       width = 7.2,
       height = 7)

################################################################################
# Figure 2
df <- read_rds("./Data/SOTSDYADS.RDS")

# Baseline model
lm1 <- lm(sim ~ t , data = df)
summary(lm1)

# Baseline model, years broken out.
lm2 <- lm(sim ~ as.factor(year) + state.x - 1, data = df)
summary(lm2)

# Plot
predata <- expand.grid(
  year = (min(df$year)):(max(df$year)),
  state.x = df$state.x[1],
  state.y = df$state.y[1]
)

preds <- cbind(predata, predict(lm2, predata, se.fit = T)) %>%
  mutate(
    yhat = fit,
    bot = yhat - 1.96 * se.fit,
    top = yhat + 1.96 * se.fit
  )

p1 <- preds %>%
  filter(year >= 1960, year <= 2016) %>%
  ggplot(aes(x = year, y = fit)) +
  geom_errorbar(aes(ymin = bot, ymax = top), width = 0, col = "gray80") +
  geom_point(col = "gray60") +
  #geom_smooth(method = "loess", se = F, col="red", span=.8) +
  geom_smooth(method = "lm", se = F, col = "red") +
  scale_x_continuous(breaks = seq(1960, 2016, 4)) +
  scale_y_continuous(breaks = pretty_breaks()) +
  labs(x = "\nYear", y = "Average Interstate SOTS Similarity\n") +
  theme_classic() +
  theme(title = element_text(face = "bold"))
p1
ggsave(p1,
       filename = "./fig1.png",
       width = 7,
       height = 7)


################################################################################
# Figure 3
# Baseline model, years broken out.
lm.plot.west <-
  lm(
    sim ~  as.factor(year) + state.x - 1,
    data = df %>% filter(region.x == "West", region.y == "West")
  )
lm.plot.south <-
  lm(
    sim ~  as.factor(year) + state.x - 1,
    data = df %>% filter(region.x == "South", region.y == "South")
  )
lm.plot.midwest <-
  lm(
    sim ~  as.factor(year) + state.x - 1,
    data = df %>% filter(region.x == "Midwest", region.y == "Midwest")
  )
lm.plot.northeast <-
  lm(
    sim ~  as.factor(year) + state.x - 1,
    data = df %>% filter(region.x == "Northeast", region.y == "Northeast")
  )


# Plot similarity over years.
predata <- expand.grid(
  year = 1960:2016,
  state.x = (df %>% filter(region.x == "West"))$state.x[1],
  state.y = (df %>% filter(region.x == "West"))$state.y[1]
)
preds <-
  cbind(predata, predict(lm.plot.west, predata, se.fit = T)) %>%
  mutate(
    yhat = fit,
    bot = yhat - 1.96 * se.fit,
    top = yhat + 1.96 * se.fit
  )
predata <- expand.grid(
  year = 1960:2016,
  state.x = (df %>% filter(region.x == "South"))$state.x[1],
  state.y = (df %>% filter(region.x == "South"))$state.y[1]
)
preds2 <-
  cbind(predata, predict(lm.plot.south, predata, se.fit = T)) %>%
  mutate(
    yhat = fit,
    bot = yhat - 1.96 * se.fit,
    top = yhat + 1.96 * se.fit
  )
predata <- expand.grid(
  year = 1961:2016,
  state.x = (df %>% filter(region.x == "Midwest"))$state.x[1],
  state.y = (df %>% filter(region.x == "Midwest"))$state.y[1]
)
preds3 <-
  cbind(predata, predict(lm.plot.midwest, predata, se.fit = T)) %>%
  mutate(
    yhat = fit,
    bot = yhat - 1.96 * se.fit,
    top = yhat + 1.96 * se.fit
  )
predata <- expand.grid(
  year = 1960:2016,
  state.x = (df %>% filter(region.x == "Northeast"))$state.x[1],
  state.y = (df %>% filter(region.x == "Northeast"))$state.y[1]
)
preds4 <-
  cbind(predata, predict(lm.plot.northeast, predata, se.fit = T)) %>%
  mutate(
    yhat = fit,
    bot = yhat - 1.96 * se.fit,
    top = yhat + 1.96 * se.fit
  )

p4 <- rbind(
  preds %>% mutate(region = "West"),
  preds2 %>% mutate(region = "South"),
  preds3 %>% mutate(region = "Midwest"),
  preds4 %>% mutate(region = "Northeast")
) %>%
  mutate(bot = ifelse(bot < 0, 0, bot)) %>%
  ggplot(aes(x = year, y = fit)) +
  geom_errorbar(aes(ymin = bot, ymax = top), width = 0, col = "gray80") +
  geom_point(col = "gray60") +
  geom_smooth(method = "lm", se = F, col = "red") +
  scale_x_continuous(breaks = seq(1960, 2016, 4)) +
  scale_y_continuous(breaks = pretty_breaks(10)) +
  labs(x = "\nYear", y = "Average Interstate SOTS Similarity\n") +
  theme_classic() +
  facet_wrap( ~ region, ncol = 2) +
  theme(
    title = element_text(face = "bold", size = 20),
    strip.text = element_text(size = 15, face = "bold")
  )
p4

ggsave(p4,
       height = 12,
       width = 12,
       filename = "./fig3.png")


################################################################################
# Figure 4
df2 <- read_rds("./Data/SOTSSOTU.RDS")

lm.sotu.yearfes <- lm(sotu_sim ~ as.factor(year), data = df2)
summary(lm.sotu.yearfes)

predata <- expand.grid(year = (min(df2$year, na.rm = T)):(max(df2$year, na.rm =
                                                                T)))
preds <-
  cbind(predata, predict(lm.sotu.yearfes, predata, se.fit = T)) %>%
  mutate(
    yhat = fit,
    bot = yhat - 1.96 * se.fit,
    top = yhat + 1.96 * se.fit
  )
p1 <- preds %>%
  filter(year >= 1960, year <= 2016) %>%
  ggplot(aes(x = year, y = fit)) +
  geom_errorbar(aes(ymin = bot, ymax = top), width = 0, col = "gray80") +
  geom_point(col = "gray60") +
  geom_smooth(method = "lm", se = F, col = "red") +
  scale_x_continuous(breaks = seq(1960, 2016, 4)) +
  scale_y_continuous(breaks = pretty_breaks()) +
  labs(x = "\nYear", y = "Average Similarity Between the SOTU and SOTS Addresses\n") +
  theme_classic() +
  theme(title = element_text(face = "bold"))
p1

ggsave(p1,
       height = 7,
       width = 7,
       filename = "./fig4.png")



################################################################################
# APPENDIX
#
# Any appendix table or figure not included in this code was produced by hand.
#
################################################################################
# Figure A3
library(scatterplot3d)

coh <- read.csv("./Data/COHERENCE.CSV")
coh <- coh[, -1]
coh <- coh[, c(1, 3, 2)]

#Create a function to generate a continuous color palette
rbPal <- colorRampPalette(c('red', 'blue'))
color <- rbPal(10)[as.numeric(cut(coh$coh, breaks = 10))]

source('http://www.sthda.com/sthda/RDoc/functions/addgrids3d.r')

s3d <- scatterplot3d(
  coh,
  main = "Grid Optimization of Coherence",
  xlab = "Num. Topics (K)",
  ylab = "Winsorization Lower Bound (WLB)",
  zlab = "Coherence",
  pch = "",
  grid = F,
  box = FALSE,
  type = "h",
  color = color
)
addgrids3d(coh, grid = c("xy", "xz", "yz"))
s3d$points3d(coh, pch = 16, col = color)

################################################################################
# Table A4

# Baseline model
lm1 <- lm(sim ~ t , data = df)
summary(lm1)
coeftest(lm1, vcov. = vcovHC(lm1))

# Copartisans model
lm3 <- lm(sim ~ t + copartisans + state.x, data = df)
summary(lm3)
coeftest(lm3, vcov. = vcovHC(lm3))

################################################################################
# Fig A7
# Baseline model, years broken out.
lm.plot.d <-
  lm(sim ~  as.factor(year) + state.x - 1,
     data = df %>% filter(party.x == "D", party.y == "D"))
lm.plot.r <-
  lm(sim ~  as.factor(year) + state.x - 1,
     data = df %>% filter(party.x == "R", party.y == "R"))


# Plot similarity over years.
predata <- expand.grid(
  year = 1960:2016,
  state.x = (df %>% filter(party.x == "D"))$state.x[1],
  state.y = (df %>% filter(party.x == "D"))$state.y[1]
)
preds <-
  cbind(predata, predict(lm.plot.d, predata, se.fit = T)) %>%
  mutate(
    yhat = fit,
    bot = yhat - 1.96 * se.fit,
    top = yhat + 1.96 * se.fit
  )
predata <- expand.grid(
  year = 1960:2016,
  state.x = (df %>% filter(party.x == "R"))$state.x[1],
  state.y = (df %>% filter(party.x == "R"))$state.y[1]
)
preds2 <-
  cbind(predata, predict(lm.plot.r, predata, se.fit = T)) %>%
  mutate(
    yhat = fit,
    bot = yhat - 1.96 * se.fit,
    top = yhat + 1.96 * se.fit
  )

p5 <- rbind(preds %>% mutate(party = "Democrats"),
            preds2 %>% mutate(party = "Republicans")) %>%
  mutate(bot = ifelse(bot < 0, 0, bot)) %>%
  ggplot(aes(x = year, y = fit)) +
  geom_errorbar(aes(ymin = bot, ymax = top), width = 0, col = "gray80") +
  geom_point(col = "gray60") +
  geom_smooth(method = "lm", se = F, col = "red") +
  scale_x_continuous(breaks = seq(1960, 2016, 4)) +
  scale_y_continuous(breaks = pretty_breaks(10)) +
  labs(x = "\nYear", y = "Average Interstate SOTS Similarity\n") +
  theme_classic() +
  facet_wrap( ~ party, ncol = 2) +
  theme(
    title = element_text(face = "bold", size = 20),
    strip.text = element_text(size = 15, face = "bold")
  )
p5

ggsave(p5,
       height = 6,
       width = 12,
       filename = "./fig-party.png")


################################################################################
# Table A5 / Figure A8
df3 <- read_rds("./Data/SOTSDYADSRAW.RDS")

# Baseline model
lm1 <- lm(sim ~ t , data = df3)
summary(lm1)
coeftest(lm1, vcov = vcovHC(lm1))

# Baseline model, years broken out.
lm2 <- lm(sim ~ as.factor(year) + state.x - 1, data = df3)
summary(lm2)

# Plot
predata <- expand.grid(
  year = (min(df$year)):(max(df3$year)),
  state.x = df3$state.x[1],
  state.y = df3$state.y[1]
)

preds <- cbind(predata, predict(lm2, predata, se.fit = T)) %>%
  mutate(
    yhat = fit,
    bot = yhat - 1.96 * se.fit,
    top = yhat + 1.96 * se.fit
  )

p1 <- preds %>%
  filter(year >= 1960, year <= 2016) %>%
  ggplot(aes(x = year, y = fit)) +
  geom_errorbar(aes(ymin = bot, ymax = top), width = 0, col = "gray80") +
  geom_point(col = "gray60") +
  #geom_smooth(method = "loess", se = F, col="red", span=.8) +
  geom_smooth(method = "lm", se = F, col = "red") +
  scale_x_continuous(breaks = seq(1960, 2016, 4)) +
  scale_y_continuous(breaks = pretty_breaks()) +
  labs(x = "\nYear", y = "Average Interstate SOTS Similarity\n") +
  theme_classic() +
  theme(title = element_text(face = "bold"))
p1
ggsave(p1,
       filename = "./fig1-rawsims.png",
       width = 7,
       height = 7)


################################################################################
# Figure A9

rdat <- df %>%
  select(sim, year, state.x)

pr.yr <- rdat %>%
  group_by(year) %>%
  summarize(n = n()) %>%
  mutate(pi = n / sum(n))

predata <- expand.grid(
  year = (min(rdat$year)):(max(rdat$year)),
  state.x = rdat$state.x[1],
  state.y = rdat$state.y[1]
)

ri_each <- function(...) {
  year.shuffle <-
    sample(
      pr.yr$year,
      size = nrow(rdat),
      prob = pr.yr$pi,
      replace = T
    )
  rdat.null <- rdat %>%
    mutate(year = year.shuffle)
  lm.null <- lm(sim ~ as.factor(year) + state.x - 1, data = rdat.null)
  preds <- cbind(predata, predict(lm.null, predata, se.fit = T)) %>%
    mutate(
      yhat = fit,
      bot = yhat - 1.96 * se.fit,
      top = yhat + 1.96 * se.fit
    )
  lm.slope <- lm(fit ~ year, data = preds)
  coef(lm.slope)[2]
}

out <- parallel::mclapply(1:2000, ri_each, mc.cores = 13)

obs.slope <- 0.003099471
p.val <- sum(unlist(out) > obs.slope) / length(out)
p.val

p5 <- data.frame(y = unlist(out)) %>%
  ggplot(aes(x = y)) +
  geom_histogram(bins = 50) +
  geom_vline(xintercept = obs.slope,
             lty = 2,
             col = "red") +
  theme_bw() +
  scale_y_continuous(labels = comma) +
  annotate("text",
           x = 0.0026,
           y = 400,
           label = "Observed Slope") +
  labs(x = "Simulated Slope Coefficient",
       y = "Count",
       title = "Permutation Test for SOTS Similarity Trends")
p5

################################################################################
# Table A6

# Baseline model
lm1 <- lm(sotu_sim ~ t , data = df2)
summary(lm1)
coeftest(lm1, vcov. = vcovHC(lm1))

# Copartisans model
lm3 <- lm(sotu_sim ~ t + copartisans + state.x, data = df2)
summary(lm3)
coeftest(lm3, vcov. = vcovHC(lm3))

################################################################################
# Figure A10

comb <- df %>%
  ungroup() %>%
  group_by(year, state.x) %>%
  summarize(sim = mean(sim)) %>%
  left_join(df2 %>%
              select(year, sim.sotu = sotu_sim, state.x = state))


p1 <- comb %>%
  filter(year >= 1960, year <= 2016) %>%
  mutate(year = ifelse(year < 1992, "PRE-1992", "POST-1992")) %>%
  ggplot(aes(x = sim, y = sim.sotu, color = year)) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "lm", se = F) +
  geom_abline(a = 0,
              b = 1,
              lty = 2,
              col = "gray70") +
  scale_color_manual(name = "Period", values = c("red", "black")) +
  scale_x_continuous(limits = c(0, 1), breaks = pretty_breaks(10)) +
  scale_y_continuous(limits = c(0, 1), breaks = pretty_breaks(10)) +
  labs(x = "Interstate SOTS Similarity", y = "SOTU Similarity") +
  theme_classic()
p1

ggsave(p1,
       filename = "./fig-simbyperiod.png",
       width = 7,
       height = 7)

lm6 <- lm(sim.sotu ~ sim + as.factor(year) + state.x, data = comb)
coeftest(lm6, .vcov = vcovHC("HC3"))
